###### Maintext: Figure 4
###### Tax Prevalence and Certainty for Most Common Taxes
gc(); rm(list = ls()); set.seed(12345)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # Note: if you are not using R Studio this command will not work, set WD to source file location manually
require(dplyr)
require(tidyr)
require(stargazer)
require(plotrix)
require(gmodels)
require(ggplot2)
source("functions.R")

## NOTE: This script also produces the robustness figure
## in the appendix at the same time (Appendix Figure B5)

######################## SURVEY EXPERIMENT ################################

df <- read.csv("data/cleaned/uganda_jun18_cleaned.csv", 
               stringsAsFactors = F)

## Define some variables and subset data

directVars <- c("income",  "local", "corporate", "property", "business")
indirectVars <- c("vat", "import", "excise")
paidDirect <- paste0("tm_paid_", directVars)
paidIndirect <- paste0("tm_paid_", indirectVars)

cerDirect <- paste0("tm_", directVars, "_certain")
cerIndirect <- paste0("tm_", indirectVars, "_certain")


## Certainty among people who paid

dfList <- list(df, df[rowSums(df[, paidDirect], na.rm = T) > 0, ])
names(dfList) <- c("all", "direct_only")

for(i in 1:length(dfList)){
  
  ## Descrptive statistics
  
  stargazer(dfList[[i]][, c(paidDirect, paidIndirect)], summary = T, digits = 2, type = "text",
            title = "Tax Proportions (Full Sample)")
  
  ## Certainty plot
  
  dfPlot <- as.data.frame(t(apply(dfList[[i]][, c(cerDirect, cerIndirect)], 2, FUN = ci, na.rm = T)))
  dfPlot$tax_type <- c(rep("Direct", 5), 
                       rep("Indirect", 3))
  dfPlot$term <- rownames(dfPlot)
  rownames(dfPlot) <- NULL
  colnames(dfPlot) <- gsub("[[:blank:]]", "", (tolower(colnames(dfPlot))))
  dfPlot <- dfPlot[order(dfPlot$estimate), ]
  dfPlot$term <- factor(dfPlot$term, levels = dfPlot$term)
  dfPlot <- dfPlot[!dfPlot$term %in% c("tm_corporate_certain", "tm_import_certain", 
                                       "tm_property_certain"), ]
  dfPlot$tax_name <- c("Excise", "VAT", "Local", "Income", "Business")
  dfPlot$tax_name <- factor(dfPlot$tax_name, levels = dfPlot$tax_name)
  
  pCert <- ggplot(data = dfPlot, mapping = aes(x = tax_name, y = estimate, color = tax_type)) + 
    geom_linerange(aes(x = tax_name, ymin = cilower,
                       ymax = ciupper, size = I(10)),
                   lwd = 1.25, position = position_dodge(width = 1/2)) +
    geom_pointrange(aes(x = tax_name, y = estimate, ymin =  cilower,
                        ymax = ciupper),
                    lwd = 0.8, position = position_dodge(width = 1/2),
                    shape = 21, fill = "WHITE", 
                    fatten = 3) + 
    ylim(3.5, 8.5) + 
    coord_flip() +  xlab("") + 
    theme_custom(legend_position = "none", 
                 title_size = 12) + #legend_position = c(0.95, 0.1)
    scale_colour_grey(start = 0.5, end = 0.2) + 
    labs(colour = "Tax Type", tag = "B") + ylab("Certainty (1-10)") +
    ggtitle("Confidence in Estimating Tax Burden")  
    

  ### Bar Plot
  
  dfPlot <- as.data.frame(t(apply(dfList[[i]][, c(paidDirect, paidIndirect)], 2, FUN = ci, na.rm = T)))
  dfPlot$tax_type <- c(rep("Direct", 5), 
                       rep("Indirect", 3))
  dfPlot$bar_color <- ifelse(dfPlot$tax_type == "Direct", "lightgray", "darkgray")
  dfPlot$term <- rownames(dfPlot)
  rownames(dfPlot) <- NULL
  colnames(dfPlot) <- gsub("[[:blank:]]", "", (tolower(colnames(dfPlot))))
  dfPlot <- dfPlot[order(dfPlot$estimate), ]
  dfPlot$term <- factor(dfPlot$term, levels = dfPlot$term)
  dfPlot <- dfPlot[!dfPlot$term %in% c("tm_paid_corporate", "tm_paid_import", 
                                       "tm_paid_property"), ]
  dfPlot$tax_name <- c("Income", "Business", "Local", "Excise", "VAT")
  dfPlot$tax_name <- factor(dfPlot$tax_name, levels = rev(dfPlot$tax_name))
  
  pProp <- ggplot(dfPlot, aes(x = tax_name, y = estimate, 
                              fill = tax_type)) + 
    geom_col() + 
    theme_custom(legend_position = c(0.95, 0.65), 
                 title_size = 12) + 
    geom_errorbar(aes(ymin = cilower, ymax = ciupper), 
                  width = 0.2) + 
    xlab("Tax Name") + ylab("Proportion Paying Tax") + 
    labs(fill = "Tax Type", tag = "A") + 
    scale_fill_grey(start = 0.5, end = 0.2) + 
    ggtitle("Prevalence of Key Taxes")
 
  
  require(ggpubr)
  pComb <- ggarrange(pProp, pCert, common.legend = F)
  pdf(file = paste0("figures/appendix/tax_combined_", names(dfList)[i], ".pdf"), 
      width = 10, height = 6)
  print(pComb)
  dev.off() 
  
  if(names(dfList)[i] == "all"){
    pdf(file = paste0("figures/fig_4.pdf"), 
        width = 10, height = 6)
    print(pComb)
    dev.off()} 
  if(names(dfList)[i] == "direct_only"){
    pdf(file = paste0("figures/appendix/appendix_fig_b5.pdf"), 
        width = 10, height = 6)
    print(pComb)
    dev.off()} 
  
  
} # Close i loop


save.image(file = "results/fig_4.RData")
